{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exploratory Data Analysis for VOC Segmentation\n",
    "\n",
    "In this notebook, we will explore the VOC 2012 dataset, visualize some samples, and analyze the distribution of classes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from PIL import Image\n",
    "import cv2\n",
    "\n",
    "# Define the path to the VOC2012 dataset\n",
    "data_dir = '../data/VOC2012/'\n",
    "image_dir = os.path.join(data_dir, 'JPEGImages')\n",
    "mask_dir = os.path.join(data_dir, 'SegmentationClass')\n",
    "\n",
    "# Check the number of images\n",
    "image_files = os.listdir(image_dir)\n",
    "print(f'Number of images: {len(image_files)}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to display images and their corresponding masks\n",
    "def display_samples(num_samples=5):\n",
    "    plt.figure(figsize=(15, 10))\n",
    "    for i in range(num_samples):\n",
    "        img_path = os.path.join(image_dir, image_files[i])\n",
    "        mask_path = os.path.join(mask_dir, image_files[i].replace('.jpg', '.png'))\n",
    "        img = Image.open(img_path)\n",
    "        mask = Image.open(mask_path)\n",
    "        plt.subplot(num_samples, 2, 2 * i + 1)\n",
    "        plt.imshow(img)\n",
    "        plt.axis('off')\n",
    "        plt.subplot(num_samples, 2, 2 * i + 2)\n",
    "        plt.imshow(mask)\n",
    "        plt.axis('off')\n",
    "    plt.show()\n",
    "\n",
    "# Display some samples\n",
    "display_samples()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Analyze class distribution\n",
    "class_names = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bus', 'car', 'cat', 'chair', 'cow',\n",
    "              'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']\n",
    "\n",
    "class_counts = np.zeros(len(class_names))\n",
    "\n",
    "for mask_file in os.listdir(mask_dir):\n",
    "    mask = Image.open(os.path.join(mask_dir, mask_file))\n",
    "    unique_classes = np.unique(mask)\n",
    "    for cls in unique_classes:\n",
    "        if cls < len(class_counts):\n",
    "            class_counts[cls] += 1\n",
    "\n",
    "# Plot class distribution\n",
    "plt.figure(figsize=(12, 6))\n",
    "plt.bar(class_names, class_counts)\n",
    "plt.xticks(rotation=90)\n",
    "plt.title('Class Distribution in VOC 2012 Segmentation Dataset')\n",
    "plt.xlabel('Classes')\n",
    "plt.ylabel('Counts')\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}